################################################################################
##
## Purpose: This script prepares the bills which pertain to votes to constrain the FED.
##
## Author: James Bisbee (james.h.bisbee@vanderbilt.edu)
##
##  - Inputs:
##    - ./data/raw/bills/congressional bills 1947-2016 (with audit for bisbee).dta: Replication data provided by Binder
##    - ./data/raw/politicians/new_legs_lookup_full.csv: Raw data from Raw data from https://github.com/unitedstates/congress-legislators, manually prepared by Bisbee
##    - ./data/prepped/hearings/cleaned_docs.csv: Prepped data from 1_DATA_hearings_prep.R
##  - Outputs:
##    - ./data/prepped/bills/binder_bills_merged.RData
##
##
## See associated log file for compute environment, package versions, 
##  and date of most recent run.
##
################################################################################
rm(list = ls())
gc()
require(tidyverse)
require(jsonlite)
require(utils)
require(xml2)

set.seed(123)

# Compute details
print(paste0('Compute environment from ',Sys.Date(),' run by Bisbee'))
if(Sys.info()['sysname'] == 'Windows') {
  ram_size = system("wmic MemoryChip get Capacity", intern = TRUE)[-1]
  model_name = system("wmic cpu get name", intern = TRUE)[2] # nocov
  vendor_id = system("wmic cpu get manufacturer", intern = TRUE)[2] # nocov
  
  print(list(ram = stringr::str_squish(ram_size)[1],
             vendor_id = stringr::str_squish(vendor_id),
             model_name = stringr::str_squish(model_name),
             no_of_cores = parallel::detectCores()))
} else if(Sys.info()['sysname'] == 'Linuxs') {
  splitted <- strsplit(system("ps -C rsession -o %cpu,%mem,pid,cmd", intern = TRUE), " ")
  df <- do.call(rbind, lapply(splitted[-1], 
                              function(x) data.frame(
                                cpu = as.numeric(x[2]),
                                mem = as.numeric(x[4]),
                                pid = as.numeric(x[5]),
                                cmd = paste(x[-c(1:5)], collapse = " "))))
  df
} else {
  cat("If not on Linux or Windows, you'll have to figure out your own solution to seeing the compute environment.")
}

sessionInfo()


# Starting with Binder's bills
bills <- haven::read_dta('./data/raw/bills/congressional bills 1947-2016 (with audit for bisbee).dta')
idlookupFull <- read_csv('./data/raw/politicians/new_legs_lookup_full.csv',col_select = -1)


bills$lname <- NA
for(i in 1:nrow(bills)) {
  bills$lname[i] <- gsub('.*? |,','',str_extract(bills$name[i],'.*?,'))
}

bills <- bills %>%
  mutate(Party = ifelse(party == 200,'GOP',
                        ifelse(party == 100,'DEM','IND')))

statelookup <- data.frame(statenm = substr(toupper(state.name),1,7),
                          stab = state.abb)

bills <- bills %>%
  mutate(statenm = gsub('OAKLAH','OKLAHO',
                        gsub('DELEWA','DELAWA',substr(statenm,1,7)))) %>%
  left_join(statelookup)

# What a huge pain to merge these...easiest is probably to get opensecretsIDs for each?
toMatch <- bills %>%
  select(name,lname,idno,Party,stab,year,district) %>%
  distinct() %>%
  filter(year > 1990) %>%
  mutate(district = ifelse(is.na(district),0,district)) %>%
  mutate(district = ifelse(stab == 'DE',1,district)) %>%
  mutate(Party = ifelse(lname == 'Bachus','GOP',Party),
         Party = ifelse(lname == 'Grimm','GOP',Party),
         Party = ifelse(lname == 'Hagan','DEM',Party),
         Party = ifelse(lname == 'Manchin','DEM',Party),
         Party = ifelse(lname == 'Sanders','IND',Party),
         Party = ifelse(lname == 'Sherman','DEM',Party),
         Party = ifelse(lname == 'Brady' & stab == 'TX','GOP',Party),
         Party = ifelse(lname == 'King' & stab == 'ME','IND',Party),
         stab = ifelse(lname == 'DeMint','SC',stab),
         stab = ifelse(lname == 'Sherman','CA',stab),
         stab = ifelse(lname == 'Hagan','NC',stab),
         stab = ifelse(lname == 'McHenry','NC',stab),
         stab = ifelse(lname == 'Merkley','OR',stab),
         stab = ifelse(lname == 'Pittenger','NC',stab),
         stab = ifelse(lname == 'Rounds','SD',stab),
         stab = ifelse(lname == 'Kanjorski','PA',stab),
         stab = ifelse(lname == 'Lankford' & year == 2014,'OK',stab),
         stab = ifelse(lname == 'Johnson' & year == 2007 & district == 30,'TX',stab),
         stab = ifelse(lname == 'Kucinich' & year == 2008 & district == 10,'OH',stab),
         stab = ifelse(lname == 'LaTourette' & year == 2008 & district == 19,'OH',stab),
         stab = ifelse(lname == 'Nethercutt' & year == 2004 & district == 5,'WA',stab),
         stab = ifelse(lname == 'Johanns' & year == 2013,'NE',stab),
         stab = ifelse(lname == 'Ross' & year == 2006 & district == 4,'AR',stab),
         district = ifelse(lname == 'Dreier' & stab == 'CA' & year == 2008,
                           26,district),
         district = ifelse(lname == 'Grimm' & stab == 'NY' & year == 2012,
                           13,district),
         district = ifelse(lname == 'Kennedy' & stab == 'MN' & year == 2005,
                           6,district),
         district = ifelse(lname == 'LaFalce' & stab == 'NY' & year %in% 2000:2001,
                           29,district),
         district = ifelse(lname == 'LaTourette' & stab == 'OH' & year %in% 2008,
                           14,district),
         district = ifelse(lname == 'Posey' & stab == 'FL' & year %in% 2009:2013,
                           15,district),
         district = ifelse(lname == 'Renacci' & stab == 'OH' & year %in% 2011:2019,
                           16,district),
         district = ifelse(lname == 'Schweikert' & stab == 'AZ' & year %in% 2011:2013,
                           5,district),
         district = ifelse(lname == 'Speier' & stab == 'CA' & year %in% 2008:2013,
                           12,district),
         district = ifelse(lname == 'Sanders' & stab == 'VT' & year %in% 2009:2015,
                           1,district),
         district = ifelse(lname == 'Dorgan' & stab == 'ND',
                           1,district),
         district = ifelse(lname == 'Lankford' & year %in% 2014:2015,
                           5,district)) %>%
  distinct() %>%
  mutate(district = ifelse(is.na(district),0,district)) %>%
  drop_na(stab)

toMatch %>%
  filter(lname == 'Johanns')


idlookupFull %>%
  filter(first == 'Mike',
         last == 'Johanns') %>%
  count(first,last,opensecrets) %>%
  print(n = 22)

bills %>%
  filter(lname == 'Johanns') %>%
  select(name,lname,stab,district,party,year)

idlookupFull %>%
  select(opensecretsID = opensecrets,year,
         first,lname = last,party,district,stab = state,fname = first) %>%
  mutate(lname = gsub('\\\\xfa','u',gsub('\\\\xf3','o',gsub('\\\\xe1','a',gsub('\\\\xe9','e',utf8::utf8_encode(lname)))))) %>%
  filter(lname == 'Johanns')

idLookupMerge <- idlookupFull %>%
  select(opensecretsID = opensecrets,year,
         first,lname = last,party,district,stab = state,fname = first) %>%
  mutate(lname = gsub('\\\\xfa','u',gsub('\\\\xf3','o',gsub('\\\\xe1','a',gsub('\\\\xe9','e',utf8::utf8_encode(lname)))))) %>%
  mutate(opensecretsID = ifelse(lname == 'LaFalce' & stab == 'NY','N00001305',opensecretsID),
         opensecretsID = ifelse(lname == 'Roukema' & stab == 'NJ','N00000740',opensecretsID),
         opensecretsID = ifelse(lname == 'Gonzalez' & stab == 'TX' & fname == 'Henry','N00005961',opensecretsID),
         opensecretsID = ifelse(lname == 'Hamilton' & stab == 'IN' & fname == 'Lee','N00003887',opensecretsID),
         opensecretsID = ifelse(lname == 'Kassebaum' & stab == 'KS','N00005258',opensecretsID),
         opensecretsID = ifelse(lname == 'Metcalf' & fname == 'Jack','N00007895',opensecretsID),
         # opensecretsID = ifelse(lname == 'Johanns' & fname == 'Mike','N00029444',opensecretsID),
         opensecretsID = ifelse(lname == 'Campbell' & fname == 'Tom' & stab == 'CA','N00007377',opensecretsID)) %>%
  drop_na(opensecretsID) %>%
  mutate(Party = ifelse(party == 'Republican','GOP',
                        ifelse(party == 'Democrat','DEM','IND'))) %>%
  distinct() %>%
  mutate(district = ifelse(is.na(district),0,district)) %>%
  mutate(district = ifelse(stab %in% c('DE','VT','ND'),1,district))

# Some issues with the 90s, but only 8, and i'm not going to sweat it
toMatch %>%
  left_join(idLookupMerge) %>%
  filter(is.na(opensecretsID)) %>%
  arrange(lname) %>%
  print(n = 22)

idLookupMerge %>%
  filter(lname == 'Johanns')

# Ok so we are just redoing tons of work here, but at least we have a merge (again)
toMatch <- toMatch %>%
  left_join(idLookupMerge)

toMatch %>%
  filter(lname == 'Johanns')

bills %>%
  select(name,lname,idno,Party,stab,year) %>%
  left_join(toMatch) %>%
  filter(!is.na(opensecretsID))

hearings <- read_csv('./data/prepped/hearings/cleaned_docs.csv') %>%
  select(-`...1`) %>%
  mutate(X1 = row_number()) 

tosearch <- hearings %>%
  select(speaker,opensecretsID,nameHearings = name) %>%
  distinct() %>%
  left_join(toMatch %>%
              select(opensecretsID,name) %>%
              distinct() %>%
              drop_na(opensecretsID)) %>%
  filter(is.na(name)) %>%
  filter(!grepl('FED',opensecretsID)) %>%
  count(speaker,nameHearings) %>%
  mutate(lname = gsub('^.* |\\.','',speaker))

tosearch %>%
  print(n = 163)
  
missingToMatch <- NULL
for(i in 1:nrow(tosearch)) {
  test <- toMatch %>%
  filter(grepl(tosearch$lname[i],name))
  if(nrow(test) > 0) {
    missingToMatch <- missingToMatch %>%
      bind_rows(test %>%
                  select(name,Party,stab) %>%
                  distinct() %>% 
                  bind_cols(tosearch %>% slice(i)))
  }
}

# Ok...no issues here. But what about with the full bills data?
missingToMatch %>%
  print(n = 40)


missingBills <- NULL
for(i in 1:nrow(tosearch)) {
  test <- bills %>%
    filter(grepl(tosearch$lname[i],name))
  if(nrow(test) > 0) {
    missingBills <- missingBills %>%
      bind_rows(test %>%
                  select(name,Party,stab) %>%
                  distinct() %>% 
                  bind_cols(tosearch %>% slice(i)))
  }
}

# Issue with Mike Johanns...just want to add him to the toMatch object
missingBills %>%
  print(n = 53)

# Okay...so we now (theoretically) have all the people at these hearings who also 
#   sponsored one of the bills merged. Of the 157 who aren't merged, we just assume
#   that they didn't sponsor one of these bills from the Binder data.
tosearch %>%
  count(lname)

# Have 477 bills that these politicians sponsored
preppedBills <- bills %>%
  select(name,lname,idno,year,stab,district,Party,empower,centralize,oversight,addirects,constrain,decentralize,independence,regulatory,monetary,audit,lname) %>% 
  left_join(toMatch %>%
              select(name,lname,idno,Party,stab,opensecretsID) %>%
              distinct()) %>%
  drop_na(opensecretsID) %>%
  select(-name,-lname,-idno,-year,-stab,-district,-Party) %>%
  gather(billType,vote,-opensecretsID) %>%
  group_by(opensecretsID,billType) %>%
  summarise(totSpons = sum(vote,na.rm=T),
            totBills = sum(!is.na(vote)),
            meanSpons = mean(vote,na.rm=T)) %>%
  ungroup() %>%
  filter(totBills > 0) %>%
  pivot_wider(names_from = 'billType',
              values_from = c('totSpons','totBills','meanSpons'))
  
# Corresponds to 195 people, although lots of these types of bills are missing data.
sapply(preppedBills,function(x) length(which(is.na(x))))



# Probably the easiest is to just dummy these. Did a politician ever sponsor a bill
#   calling to audit the FED?
finalBills <- hearings %>%
  select(opensecretsID,party) %>%
  distinct() %>%
  left_join(preppedBills %>%
              mutate(anyBill = 1)) %>%
  drop_na(opensecretsID) %>%
  mutate(constrain_empower_tot = totSpons_constrain - totSpons_empower,
         cent_decent_tot = totSpons_centralize - totSpons_decentralize,
         oversight_indep_tot = totSpons_oversight - totSpons_independence) %>%
  select(opensecretsID,matches('_tot$'),anyBill) %>%
  mutate_at(vars(-opensecretsID),function(x) ifelse(is.na(x),0,x))


save(finalBills,file = './data/prepped/bills/binder_bills_merged.RData')
